pacman::p_load(tidyverse, data.table, broom)
rm(list=ls())
################################################################################


#################### Brazil

for(commodity in c('beef','soybean','maize')){
  
  df = fread(paste0("../../data/agribusiness/clean/",commodity,"_brazil_county_hub_distance.csv.gz"))
  
  df_dist = df[,list(product_type,county_id,county_id_hub,distance_km)][, lapply(.SD, median), by=.(product_type,county_id, county_id_hub)]
  df_size_x_i = df[,list(product_type,county_id,equivalent_tonnes)][, lapply(.SD, sum), by=.(product_type,county_id)]
  df_size_x_j = df[,list(product_type,county_id_hub,equivalent_tonnes)][, lapply(.SD, sum), by=.(product_type,county_id_hub)]
  df_size_y_j = df[,list(product_type,equivalent_tonnes)][, lapply(.SD, sum), by=.(product_type)]
  df = merge(df_size_x_j,df_size_y_j,by=c('product_type'))

  df$size_q = df$equivalent_tonnes.x
  df$share_q = df$equivalent_tonnes.x/df$equivalent_tonnes.y
  df_size = df[,list(product_type,county_id_hub,share_q,size_q)]
  df = merge(df_dist,df_size,by=c('product_type','county_id_hub'))
  df = merge(df,df_size_x_i,by=c('product_type','county_id'))
  setnames(df, old='equivalent_tonnes', new='size_q_i')
  df_main = df
  
  #Distance to nearest hub
  df = df_main
  df$ma = 1/(df$distance_km+1)
  df = df %>% group_by(product_type,county_id) %>% top_n(1, ma)
  df$ma_type = 'nearest_hub'
  df_ma1 = setDT(df)[,list(product_type,county_id,ma,ma_type,size_q_i)]
  
  #Hub size-weighted distance to nearest hub
  df = df_main
  df$ma = df$share_q/(df$distance_km+1)
  df = df %>% group_by(product_type,county_id) %>% top_n(1, ma)
  df$ma_type = 'nearest_hub_weighted'
  df_ma2 = setDT(df)[,list(product_type,county_id,ma,ma_type,size_q_i)]
  
  #Market access measure
  df = df_main
  df$ma = df$share_q/(df$distance_km+1)
  df = df[,list(product_type,county_id,size_q_i,ma)][, lapply(.SD, sum), by=.(product_type,county_id,size_q_i)]
  df$ma_type = 'all_hubs_weighted'
  df_ma3 = setDT(df)[,list(product_type,county_id,ma,ma_type,size_q_i)]
  
  df=rbind(df_ma1,df_ma2,df_ma3)
  df$country='BRAZIL'
  df$commodity=commodity
  
  if(commodity=='beef'){df_brazil_beef = df}
  if(commodity=='soybean'){df_brazil_soybean = df}
  if(commodity=='maize'){df_brazil_maize= df}
  
}


#################### Argentina

df = fread(paste0("../../data/agribusiness/clean/soybean_argentina_county_port_distance.csv.gz"))

df_dist = df[,list(product_type,county_id,county_id_port,distance_km)][, lapply(.SD, median), by=.(product_type,county_id, county_id_port)]
df_size_x_i = df[,list(product_type,county_id,equivalent_tonnes)][, lapply(.SD, sum), by=.(product_type,county_id)]
df_size_x_j = df[,list(product_type,county_id_port,equivalent_tonnes)][, lapply(.SD, sum), by=.(product_type,county_id_port)]
df_size_y_j = df[,list(product_type,equivalent_tonnes)][, lapply(.SD, sum), by=.(product_type)]
df = merge(df_size_x_j,df_size_y_j,by=c('product_type'))

df$size_q = df$equivalent_tonnes.x
df$share_q = df$equivalent_tonnes.x/df$equivalent_tonnes.y
df_size = df[,list(product_type,county_id_port,share_q,size_q)]
df = merge(df_dist,df_size,by=c('product_type','county_id_port'))
df = merge(df,df_size_x_i,by=c('product_type','county_id'))
setnames(df, old='equivalent_tonnes', new='size_q_i')
df_main = df

#Distance to nearest hub
df = df_main
df$ma = 1/(df$distance_km+1)
df = df %>% group_by(product_type,county_id) %>% top_n(1, ma)
df$ma_type = 'nearest_hub'
df_ma1 = setDT(df)[,list(product_type,county_id,ma,ma_type,size_q_i)]

#Hub size-weighted distance to nearest hub
df = df_main
df$ma = df$share_q/(df$distance_km+1)
df = df %>% group_by(product_type,county_id) %>% top_n(1, ma)
df$ma_type = 'nearest_hub_weighted'
df_ma2 = setDT(df)[,list(product_type,county_id,ma,ma_type,size_q_i)]

#Market access measure
df = df_main
df$ma = df$share_q/(df$distance_km+1)
df = df[,list(product_type,county_id,size_q_i,ma)][, lapply(.SD, sum), by=.(product_type,county_id,size_q_i)]
df$ma_type = 'all_hubs_weighted'
df_ma3 = setDT(df)[,list(product_type,county_id,ma,ma_type,size_q_i)]

df=rbind(df_ma1,df_ma2,df_ma3)
df$country='ARGENTINA'
df$commodity='soybean'
df_argentina_soybean = df


#################### Bind all
df = rbind(df_brazil_beef,df_brazil_soybean,df_brazil_maize,df_argentina_soybean)
write.csv(df, gzfile(paste0("../../data/agribusiness/clean/distance_to_hub_all.csv.gz")), row.names = FALSE)

